package util;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

/**
 * @author tms
 * @Date 2023.9.13
 */
public class DealCharacterUtil {

    /**
     * 过滤特殊字符
     *
     * @param content 文件内容
     * @return 过滤后的文件内容
     */
    public static String clearSpecialCharacters(String content) {
        // 将内容转换为小写
        content = StringUtils.lowerCase(content);
        // 过滤HTML标签
        content = Jsoup.clean(content, Whitelist.none());
        // 过滤特殊字符
        String[] strings = {" ", "\n", "\r", "\t", "\\r", "\\n", "\\t", "&nbsp;", "&amp;", "&lt;", "&gt;", "&quot;", "&qpos;"};
        for (String string : strings) {
            content = content.replaceAll(string, "");
        }
        return content;
    }
}
